#!/bin/python
import sys
import os
import bs4
from bs4 import BeautifulSoup

root = 'strlst.myogaya.jp'
new_root = 'localhost'

def verify(filepath):
    return os.path.exists(file_path)

def convert(filepath):
    if os.path.isdir(filepath):
        for root, dirs, files in os.walk(filepath):
            path = root.split(os.sep)
            target_root = './' + '/'.join(path).replace(filepath, '')
            if target_root != './':
                try:
                    os.mkdir(target_root)
                except OSError as e:
                    pass
            for file in files:
                if not file.lower().endswith(('.html')):
                    continue
                target_dir = target_root + '/' if target_root != './' else target_root
                target_file = (target_dir + file).replace('html', 'gmi')
                target_link = 'gemini://{}/{}'.format(new_root, target_dir.replace('./', ''))
                with open(target_file, 'w+') as opened_file:
                    process_file(root + '/' + file, opened_file, target_link)

def process_file(source, target_file, target_link):
    with open(source, 'r') as file:
        html = file.read().replace('\n', '')
        soup = BeautifulSoup(html, features='lxml')
        dive(soup.html.body, target_file, target_link)

def dive(tag, target_file, target_link):
    name = tag.name
    if isinstance(tag, bs4.element.NavigableString):
        name = tag.parent.name
    if tag.string:
        if tag.name == 'h1':
            target_file.write('# {}\n'.format(tag.string))
        elif tag.name == 'h2':
            target_file.write('## {}\n'.format(tag.string))
        elif tag.name == 'h3':
            target_file.write('### {}\n'.format(tag.string))
        elif tag.name == 'li' or name == 'li':
            target_file.write('* {}\n'.format(tag.string))
        elif tag.name == 'a':
            absolute_href = tag['href'] if 'http' in tag['href'] else (target_link + tag['href']).replace('html', 'gmi')
            final_link = absolute_href.replace('https', 'gemini').replace('html', 'gmi').replace(root, new_root) if root in absolute_href else absolute_href
            target_file.write('=> {}\n'.format(final_link, tag.string))
        elif tag.name == 'p':
            target_file.write('{}\n\n'.format(tag.string))
        elif tag.name == 'tr':
            target_file.write('{}\n\n'.format(tag.string))
    if isinstance(tag, bs4.element.NavigableString):
        return
    if len(list(tag.children)) == 0:
        return
    for c in tag.children:
        dive(c, target_file, target_link)

if __name__ == "__main__":
    usage = """usage: ./main.py html-root [root url] [new root url]"""
    if len(sys.argv) < 2:
        print('supply filepath')
        exit(1)
    if not os.path.exists(sys.argv[1]):
        print('supply valid filepath')
        exit(1)
    if len(sys.argv) > 2:
        root = str(sys.argv[2])
    if len(sys.argv) > 3:
        new_root = str(sys.argv[3])
    convert(sys.argv[1])
